# Scrapy settings for baidu_project project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = "baidu_project"

SPIDER_MODULES = ["baidu_project.spiders"]
NEWSPIDER_MODULE = "baidu_project.spiders"


# 用户代理（防止被识别为爬虫）
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'

# 遵守 robots.txt 规则
ROBOTSTXT_OBEY = False

# 启用 pipelines
ITEM_PIPELINES = {
    'baidu_project.pipelines.BaiduProjectPipeline': 300,
}

# 防止爬取过快（避免被封）
DOWNLOAD_DELAY = 1  # 每次请求间隔 1 秒

# Set settings whose default value is deprecated to a future-proof value
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
FEED_EXPORT_ENCODING = "utf-8"
